1 Setup

# set working directory
setwd("~/GitHub/CorticalFoldingHumans_deaging_harmonization")
# load packages ----
library(tidyverse)
library(readxl)
library(stringr)
library(lubridate)
library(kableExtra)
library(ggpubr)
library(broom)
library(MASS)
library(irr)
library(effects)
library(lme4)
library(lsmeans)
# load datasets ----
# removing OASIS as it does not have lobes information

data <- read_csv("data.csv") %>%
  filter(Sample != "OASIS")
# estimate localGI, k, K, S and I
# correct for gaussian curvature for lobes estimates
# estimate localGI, k, K, S and I for lobes

data <- data %>%
  mutate(
    logAvgThickness = log10(AvgThickness),
    logTotalArea = log10(TotalArea),
    logExposedArea = log10(ExposedArea),
    localGI = TotalArea / ExposedArea,
    k = sqrt(AvgThickness) * TotalArea / (ExposedArea ^ 1.25),
    K = 1 / 4 * log10(AvgThickness^ 2)  + log10(TotalArea) - 5 / 4 * log10(ExposedArea),
    S = 3 / 2 * log10(TotalArea) + 3 / 4 * log10(ExposedArea) - 9 /  4 * log10(AvgThickness^2),
    I = log10(TotalArea) + log10(ExposedArea) + log10(AvgThickness^ 2),
    Knorm = K / sqrt(1 + (1 / 4) ^ 2 + (5 / 4) ^ 2),
    Snorm = S / sqrt((3 / 2) ^ 2 + (3 / 4) ^ 2 + (9 / 4) ^ 2),
    Inorm = I / sqrt(1 ^ 2 + 1 ^ 2 + 1 ^ 2),
    c = as.double(ifelse(ROI == "hemisphere", NA, 4 * pi / GaussianCurvature)),
    TotalArea_corrected = ifelse(ROI == "hemisphere", TotalArea, TotalArea * c),
    ExposedArea_corrected = ifelse(ROI == "hemisphere", ExposedArea, ExposedArea * c),
    logTotalArea_corrected = log10(TotalArea_corrected),
    logExposedArea_corrected = log10(ExposedArea_corrected),
    localGI_corrected = TotalArea_corrected / ExposedArea_corrected,
    k_corrected = sqrt(AvgThickness) * TotalArea_corrected / (ExposedArea_corrected ^ 1.25),
    K_corrected = log10(TotalArea_corrected) - 5 / 4 * log10(ExposedArea_corrected) + 1 / 4 * log10(AvgThickness^ 2),
    I_corrected = log10(TotalArea_corrected) + log10(ExposedArea_corrected) + log10(AvgThickness^ 2),
    S_corrected = 3 / 2 * log10(TotalArea_corrected) + 3 / 4 * log10(ExposedArea_corrected) - 9 /  4 * log10(AvgThickness^ 2) 
  )
# Define variables as factor ----
data$ROI <- as.factor(data$ROI)
data$Diagnostic <- as.factor(data$Diagnostic)
data$Sample <- as.factor(data$Sample)
data$Gender <- as.factor(data$Gender)
# select CTL subjects ----

data <- data %>% filter(Diagnostic == "CTL")
# target age for deaging ----
Age.cor = 0 

2 Dataset description

Sample N age age_range AvgT AT AE
ADNI 868 75 ± 6.5 56 ; 96 2.4 ± 0.11 97000 ± 8400 39000 ± 2700
AHEAD 100 42 ± 19 24 ; 76 2.6 ± 0.15 110000 ± 10000 39000 ± 2700
AOMICPIOP1 208 22 ± 1.8 18 ; 26 2.6 ± 0.084 110000 ± 10000 40000 ± 2700
AOMICPIOP2 224 22 ± 1.8 18 ; 26 2.6 ± 0.089 110000 ± 9100 39000 ± 2700
HCPr900 881 29 ± 3.6 24 ; 37 2.7 ± 0.09 110000 ± 11000 40000 ± 3000
IXI 563 49 ± 16 20 ; 86 2.5 ± 0.14 97000 ± 10000 39000 ± 3100
NKI 168 34 ± 19 4 ; 85 2.5 ± 0.14 110000 ± 11000 40000 ± 2800
Sample N age age_range K S I
ADNI 868 75 ± 6.5 56 ; 96 -0.56 ± 0.017 9.2 ± 0.13 10 ± 0.074
AHEAD 100 42 ± 19 24 ; 76 -0.5 ± 0.023 9.2 ± 0.13 10 ± 0.094
AOMICPIOP1 208 22 ± 1.8 18 ; 26 -0.49 ± 0.014 9.1 ± 0.094 10 ± 0.078
AOMICPIOP2 224 22 ± 1.8 18 ; 26 -0.51 ± 0.013 9.1 ± 0.093 10 ± 0.078
HCPr900 881 29 ± 3.6 24 ; 37 -0.5 ± 0.013 9.1 ± 0.11 10 ± 0.082
IXI 563 49 ± 16 20 ; 86 -0.56 ± 0.022 9.2 ± 0.14 10 ± 0.1
NKI 168 34 ± 19 4 ; 85 -0.52 ± 0.027 9.2 ± 0.12 10 ± 0.094

3 Deaging

3.1 Is it a linear function?

Visual verification of morphological variables behaviour with aging. Considering not all brain regions ages in the same rate, we split it in the desired ROI.

3.1.1 For the hemisphere

3.1.1.1 Base measurements

ap1_b <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = TotalArea)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample ), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
    guides(alpha= "none") +
  theme_pubr()+
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

ap1_b

ap1_c <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = ExposedArea)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample ), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
    guides(alpha= "none") +
  theme_pubr()+
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

ap1_c

3.1.1.2 Novel measurements

ap1_e <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = S)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample ), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
    guides(alpha= "none") +
  theme_pubr()+
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

ap1_e

ap1_f <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = I)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
  guides(alpha= "none") +
  theme_pubr()+
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

ap1_f

# Generate and save figures
ap1 <- 
  ggarrange(ap1_a, ap1_b, ap1_c, ap1_d, ap1_e, ap1_f, labels = c("A","B", "C", "D", "E", "F"), nrow = 3, ncol = 2, font.label = list(size = 11), common.legend = TRUE, legend = "top")

# ggsave("ap1.pdf", ap1, dpi=1200, width = 17.8, height = 17.8, units = "cm", device = "pdf")

3.1.2 For the lobes

3.1.2.1 Base measurements

ggplot(filter(data, ROI != "hemisphere"), aes(x = Age, y = TotalArea)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
  facet_grid(ROI ~ .) +
      guides(alpha= "none") +
  theme_pubr()+
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

ggplot(filter(data, ROI != "hemisphere"), aes(x = Age, y = ExposedArea)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
  facet_grid(ROI ~ .) +
      guides(alpha= "none") +
  theme_pubr()+
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

3.1.2.2 Novel measurements

ggplot(filter(data, ROI != "hemisphere"), aes(x = Age, y = S)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
  facet_grid(ROI ~ .) +
      guides(alpha= "none") +
  theme_pubr() +
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

ggplot(filter(data, ROI != "hemisphere"), aes(x = Age, y = I)) +
  geom_point(aes(alpha =0.4, color = Sample, fill = Sample), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm", color = "black") +
  stat_cor() +
  facet_grid(ROI ~ .) +
      guides(alpha= "none") +
  theme_pubr() +
    scale_x_continuous(name ="Age", 
                    limits=c(0,100),
                breaks=c(seq(0,100,25)))

3.2 Removing age effect - substracting the slope

We remove the age effect at the base measurements, Total Area, Exposed Area and Cortical Thickness.

For the deaging process, we use Robust Linear Regressions, as it give weights to each data point and reduce the effects of outliers.

The deaging process NEED to be based on CONTROL values. Age also affects brain structure, as seens in the figures above. If the deaging process was based on all subjects, we could remove the disease effect across aging. Also, the deaging process is unique for each ROI. Therefore, we regress the variable for each ROI in an independent process here.

Further, we also need to separate for each Sample. All humans tends to have the same behaviour, but heterogeneous acquisition and processing could imply in a Type II Error addition in the results. This will be handle by the harmonization process.

## AvgThickness ---
# select only CTL subjects with logAvgThickness values.
# divide by each ROI and sample
# perform the robust linear regression and visualize all results as a data frame

decay_AvgThickness <-
  filter(data, Diagnostic == "CTL", !is.na(AvgThickness), !is.nan(AvgThickness), !is.infinite(AvgThickness)) %>% 
  droplevels() %>%
  group_by(ROI, Sample) %>% 
  do(fit_decay_AvgThickness = tidy(rlm(AvgThickness ~ Age, data = .), conf.int=TRUE)) %>% unnest(cols = c(fit_decay_AvgThickness)) 

# display the results for example
decay_AvgThickness %>%
  filter(ROI == "hemisphere") %>%
  kable() %>%
  kable_styling()
ROI Sample term estimate std.error statistic conf.low conf.high
hemisphere ADNI (Intercept) 2.8705639 0.0297791 96.395401 2.8121980 2.9289298
hemisphere ADNI Age -0.0068515 0.0003969 -17.261100 -0.0076294 -0.0060735
hemisphere AHEAD (Intercept) 2.7944993 0.0193034 144.767398 2.7566654 2.8323332
hemisphere AHEAD Age -0.0057785 0.0004165 -13.874348 -0.0065948 -0.0049622
hemisphere AOMICPIOP1 (Intercept) 2.7919001 0.0533391 52.342438 2.6873573 2.8964429
hemisphere AOMICPIOP1 Age -0.0076430 0.0023971 -3.188472 -0.0123412 -0.0029448
hemisphere AOMICPIOP2 (Intercept) 2.9997519 0.0518957 57.803467 2.8980382 3.1014656
hemisphere AOMICPIOP2 Age -0.0177330 0.0023559 -7.527025 -0.0223506 -0.0131155
hemisphere HCPr900 (Intercept) 2.7921977 0.0166452 167.747567 2.7595736 2.8248218
hemisphere HCPr900 Age -0.0035976 0.0005716 -6.294081 -0.0047179 -0.0024773
hemisphere IXI (Intercept) 2.7075605 0.0095766 282.725861 2.6887907 2.7263303
hemisphere IXI Age -0.0048540 0.0001865 -26.032197 -0.0052194 -0.0044885
hemisphere NKI (Intercept) 2.6985922 0.0122927 219.528357 2.6744990 2.7226854
hemisphere NKI Age -0.0045080 0.0003168 -14.230010 -0.0051289 -0.0038871

For removing the age effect, we are insterested in the slope. The slope represents the, supposed for this task, constant rate of decreasing of each variable. Future works can improve this process by describing this behaviour with higher levels functions. We will then create a new variable, called c, as the correction constant. To describe fully, we also include the standard error of each regression slope.

decay_AvgThickness <- filter(decay_AvgThickness,term == "Age") %>%
  mutate(c_AvgThickness = estimate,
         std_error_c_AvgThickness = std.error) %>%
  dplyr::select(c(ROI, Sample, c_AvgThickness, std_error_c_AvgThickness))

This process will be repeated for the Total Area and Exposed Area.

## TotalArea ---
decay_TotalArea <- filter(data, Diagnostic == "CTL", !is.na(TotalArea), !is.nan(TotalArea), !is.infinite(TotalArea)) %>%
  droplevels() %>%
  group_by(ROI, Sample) %>%
  do(fit_decay_TotalArea = tidy(rlm(TotalArea ~ Age, data = .),conf.int=TRUE)) %>%
  unnest(cols = c(fit_decay_TotalArea))

decay_TotalArea <- filter(decay_TotalArea, term == "Age") %>%
  mutate(c_TotalArea = estimate,
         std_error_c_TotalArea = std.error) %>%
  dplyr::select(c(ROI, Sample, c_TotalArea, std_error_c_TotalArea))

## ExposedArea ---
decay_ExposedArea <- filter(data, Diagnostic == "CTL", !is.na(ExposedArea), !is.nan(ExposedArea), !is.infinite(ExposedArea)) %>%
  droplevels() %>%
  group_by(ROI, Sample) %>%
  do(fit_decay_ExposedArea = tidy(rlm(ExposedArea ~ Age, data = .), conf.int = TRUE)) %>%
  unnest(cols = c(fit_decay_ExposedArea))

decay_ExposedArea <- filter(decay_ExposedArea, term == "Age")  %>%
  mutate(c_ExposedArea = estimate,
         std_error_c_ExposedArea = std.error) %>%
  dplyr::select(c(ROI, Sample, c_ExposedArea, std_error_c_ExposedArea))

With the correction constants, we will correct every variable and then estimate K, S and I and its normalized values. It is possible to specify any Age reference for the deaging.

Considering the linear function \(y = ax + b\), we will estimate the new y trying to have an slope closer to zero (as the original slope will be subtract by its own approximation extracted from the robust linear regression). We have them \(y\_{new} = y - a(x - x\_{new})\), in our terms, \(y\_{new} = y - c(x - Age.cor)\).

data <- full_join(data, decay_AvgThickness) %>%
  full_join(decay_TotalArea) %>%
  full_join(decay_ExposedArea) %>%
  mutate(
    AvgThickness_age_decay = AvgThickness - c_AvgThickness * (Age - Age.cor),
    TotalArea_age_decay = TotalArea - c_TotalArea * (Age - Age.cor),
    ExposedArea_age_decay = ExposedArea - c_ExposedArea * (Age - Age.cor),
    K_age_decay = log10(TotalArea_age_decay) + 1/4*log10(AvgThickness_age_decay^2) - 5/4*log10(ExposedArea_age_decay),
    I_age_decay = log10(TotalArea_age_decay) + log10(ExposedArea_age_decay) + log10(AvgThickness_age_decay^2),
    S_age_decay = 3/2*log10(TotalArea_age_decay) + 3/4*log10(ExposedArea_age_decay) - 9/4*log10(AvgThickness_age_decay^2),
    Knorm_age_decay = K_age_decay/sqrt(1 + (1/4)^2 + (5/4)^2),
    Snorm_age_decay = S_age_decay/sqrt((3/2)^2 + (3/4)^2 + (9/4)^2),
    Inorm_age_decay = I_age_decay/sqrt(1^2+1^2+1^2))

3.2.1 Result (hemisphere only)

To compare the values, we will have the raw measurements in light gray, and the new values in dark blue

3.2.2 Changing the age reference, or age target

Let’s change to 25 years old. At this age we expect the brain already finished development and did not started aging.

Age.cor = 25 # 

data <- data %>%
  mutate(
    AvgThickness_age_decay = AvgThickness - c_AvgThickness * (Age - Age.cor),
    TotalArea_age_decay = TotalArea - c_TotalArea * (Age - Age.cor),
    ExposedArea_age_decay = ExposedArea - c_ExposedArea * (Age - Age.cor),
    K_age_decay = log10(TotalArea_age_decay) + 1/4*log10(AvgThickness_age_decay^2) - 5/4*log10(ExposedArea_age_decay),
    I_age_decay = log10(TotalArea_age_decay) + log10(ExposedArea_age_decay) + log10(AvgThickness_age_decay^2),
    S_age_decay = 3/2*log10(TotalArea_age_decay) + 3/4*log10(ExposedArea_age_decay) - 9/4*log10(AvgThickness_age_decay^2),
    Knorm_age_decay = K_age_decay/sqrt(1 + (1/4)^2 + (-5/4)^2),
    Snorm_age_decay = S_age_decay/sqrt((3/2)^2 + (3/4)^2 + (9/4)^2),
    Inorm_age_decay = I_age_decay/sqrt(1^2+1^2+1^2))

How changing the correction Age affects of Cortical Thickness and K deaging:

4 Harmonization

The harmonization procedure is useful to combine multiple datasets to improve the statistical power, expand the findings to other Samples and studies and validating the findings of a local Sample to global results. The heterogeneous methodology limits these comparisons, so we will handle the differences to work the data.

The harmonization procedure follow the same premises as the deaging, but here we assume all samples follow the same trend. We will them remove the Type II Error by subtracting for each datapoint the residue of its sample.

4.1 Visualization of Type II Error

If we extrapolate the regression lines we have:

We can see how the intercepts at 0 years old are different for the three Samples. With the harmonization procedure, our goal is to have the same intercept for all Samples, because we are assuming this difference is due to the different acquisition and processing materials and methods.

To do this we need to consider the Sample as a random effect in a Linear Mixed Model. The residue will be then the difference between each Sample intercept and a general intercept for all datapoints (black dashed line).

Again, our reference are the Control subjects. To keep it simple, we will harmonize the samples only for the hemisphere in this example. The code including the lobes will be available right after.

data_rate <-
  filter(
    data,
    ROI == "hemisphere",
    Diagnostic == "CTL"
  )

data_rate$Sample <- as.factor(data_rate$Sample)

4.2 Removing the sample effect - substracting the intercept

As in the deaging, we harmonize the raw measurements. We will use the Cortical Thickness as example again.

## AvgThickness \~ Age ----

m.1 <- lme4::lmer(AvgThickness ~ Age + (1|Sample), data = data_rate) # run the linear mixed model

as_tibble(ranef(m.1)) # random effects in tibble format
grpvar term grp condval condsd
Sample (Intercept) ADNI 0.0008163 0.0024061
Sample (Intercept) AHEAD 0.0210192 0.0070004
Sample (Intercept) AOMICPIOP1 -0.0149690 0.0048896
Sample (Intercept) AOMICPIOP2 -0.0289463 0.0047141
Sample (Intercept) HCPr900 0.0833446 0.0023884
Sample (Intercept) IXI -0.0295801 0.0029849
Sample (Intercept) NKI -0.0316847 0.0054318
ap5_a <- ggplot(as_tibble(ranef(m.1)), aes(x = grp, y = condval)) + 
  geom_pointrange(aes(ymin = condval - condsd, ymax = condval + condsd)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_pubr() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  labs(x = "Sample", y = "Residual (standard deviation)")

ap5_a

# ggsave("ap5_a.pdf", ap5_a, dpi=1200, width = 8.7, height = 8.7, units = "cm", device = "pdf")


re <- as_tibble(ranef(m.1)) %>% # extract the random effects
  mutate(T_shift = condval, Sample = grp) %>% # the condval is the intercept difference for each sample. it is also a constant. here we will refer to it as shift
  dplyr::select(-c(condval, grpvar, term, condsd, grp))

data <- full_join(data, re) %>% mutate(AvgThickness_shiftc = AvgThickness - T_shift) # includign the constant in the dataset and estimanting the harmonizaed values

Condval (in the figure called “Residual”) equal to zero means the sample intercept is the same as the general regressing line.

## TotalArea \~ Age ----

m.1 <- lme4::lmer(TotalArea ~ Age + (1|Sample), data = data_rate)

re <- as_tibble(ranef(m.1)) %>%
  filter(grpvar == "Sample") %>%
  mutate(AT_shift = condval, Sample = grp) %>%
  dplyr::select(-c(condval, grpvar, term, condsd, grp))

data <- full_join(data, re) %>% mutate(TotalArea_shiftc = TotalArea - AT_shift)

## ExposedArea \~ Age ----

m.1 <- lme4::lmer(ExposedArea ~ Age + (1|Sample), data = data_rate)

re <- as_tibble(ranef(m.1)) %>%
  filter(grpvar == "Sample") %>%
  mutate(AE_shift = condval, Sample = grp) %>%
  dplyr::select(-c(condval, grpvar, term, condsd, grp))

data <- full_join(data, re) %>% mutate(ExposedArea_shiftc = ExposedArea - AE_shift)

Re-estimating values:

data <- data %>%
  mutate(
    AvgThickness_shiftc = AvgThickness - c_AvgThickness * (Age - Age.cor),
    TotalArea_shiftc = TotalArea - c_TotalArea * (Age - Age.cor),
    ExposedArea_shiftc = ExposedArea - c_ExposedArea * (Age - Age.cor),
    K_shiftc = log10(TotalArea_shiftc) + 1/4*log10(AvgThickness_shiftc^2) - 5/4*log10(ExposedArea_shiftc),
    I_shiftc = log10(TotalArea_shiftc) + log10(ExposedArea_shiftc) + log10(AvgThickness_shiftc^2),
    S_shiftc = 3/2*log10(TotalArea_shiftc) + 3/4*log10(ExposedArea_shiftc) - 9/4*log10(AvgThickness_shiftc^2),
    Knorm_shiftc = K_age_decay/sqrt(1 + (1/4)^2 + (5/4)^2),
    Snorm_shiftc = S_age_decay/sqrt((3/2)^2 + (3/4)^2 + (9/4)^2),
    Inorm_shiftc = I_age_decay/sqrt(1^2+1^2+1^2))

4.2.1 Result (hemisphere only)

To compare the values, we will have the raw measurements in light gray, and the new values in dark blue

4.2.1.1 Base measurement - Average Cortical Thickness

Comparing with the raw data:

4.2.1.2 Novel measurement - K

Comparing with the raw data:

5 Deaging and harmonization in one shot

Combining the two processes is simple as extracting the slope from the linear mixed model used for the harmonization. Here, we also included the lobes.

# filter CTL subjects (again, if you are running this script with other diangostics)
data_rate <-
  filter(data, Diagnostic == "CTL")

# assure Sample is a factor
data_rate$Sample <-
  as.factor(data_rate$Sample)

# assure ROI is a factor and is in the desired order
data_rate$ROI <-
  factor(data_rate$ROI,
         levels = c("hemisphere", "F", "O", "P", "T"))

First, we run the lmer, then extract the residual for each sample and the *_shift* constant. After, we extract the Age.trend constant (called c in the previous deaging example). Finally, the code re-estimates the variable in all configurations: harmonized, deaged, and both.

Here, we used the linear version of the variables. It does not affect the final result.

## AvgThickness ---
m.1 <- lme4::lmer(AvgThickness ~ Age * ROI + (1 | Sample:ROI), data = data_rate)

re <- as_tibble(ranef(m.1)) %>%
  filter(grpvar == "Sample:ROI") %>%
  mutate(
    T_shift = condval,
    sd_T_shift = condsd,
    Sample = str_split(grp,
                       pattern = ":", simplify = TRUE)[, 1],
    ROI = str_split(grp,
                    pattern = ":", simplify = TRUE)[, 2]
  ) %>%
  dplyr::select(-c(condval, grpvar, term, condsd, grp))

Age.trend <- as_tibble(lstrends(m.1, ~ ROI, var = "Age")) %>%
  mutate(Age.trend_T = Age.trend) %>%
  dplyr::select(c(ROI, Age.trend_T))

data <- full_join(data, Age.trend) %>%
  full_join(re) %>%
  mutate(
    AvgThickness_shiftc = AvgThickness - T_shift,
    AvgThickness_age_decay = AvgThickness - Age.trend_T * (Age - Age.cor),
    AvgThickness_age_decay_shiftc = AvgThickness - T_shift - Age.trend_T * (Age - Age.cor)
  )


## TotalArea ---
m.1 <- lme4::lmer(TotalArea ~ Age * ROI + (1 | Sample:ROI), data = data_rate)

re <- as_tibble(ranef(m.1)) %>%
  filter(grpvar == "Sample:ROI") %>%
  mutate(
    AT_shift = condval,
    sd_AT_shift = condsd,
    Sample = str_split(grp,
                       pattern = ":", simplify = TRUE)[, 1],
    ROI = str_split(grp,
                    pattern = ":", simplify = TRUE)[, 2]
  ) %>%
  dplyr::select(-c(condval, grpvar, term, condsd, grp))

Age.trend <- as_tibble(lstrends(m.1, ~ ROI, var = "Age")) %>%
  mutate(Age.trend_AT = Age.trend) %>%
  dplyr::select(c(ROI, Age.trend_AT))

data <- full_join(data, re) %>%
  full_join(Age.trend) %>%
  mutate(
    TotalArea_shiftc = TotalArea - AT_shift,
    TotalArea_age_decay = TotalArea - Age.trend_AT * (Age - Age.cor),
    TotalArea_age_decay_shiftc = TotalArea - AT_shift - Age.trend_AT * (Age - Age.cor))

## ExposedArea ---
m.1 <- lme4::lmer(ExposedArea ~ Age * ROI + (1 | Sample:ROI), data = data_rate)

re <- as_tibble(ranef(m.1)) %>%
  filter(grpvar == "Sample:ROI") %>%
  mutate(
    AE_shift = condval,
    sd_AE_shift = condsd,
    Sample = str_split(grp,
                       pattern = ":", simplify = TRUE)[, 1],
    ROI = str_split(grp,
                    pattern = ":", simplify = TRUE)[, 2]
  ) %>%
  dplyr::select(-c(condval, grpvar, term, condsd, grp))

Age.trend <- as_tibble(lstrends(m.1, ~ ROI, var = "Age")) %>%
  mutate(Age.trend_AE = Age.trend) %>%
  dplyr::select(c(ROI, Age.trend_AE))

data <- full_join(data, re) %>%
  full_join(Age.trend) %>%
  mutate(
    ExposedArea_shiftc = ExposedArea - AE_shift,
    ExposedArea_age_decay = ExposedArea - Age.trend_AE * (Age - Age.cor),
    ExposedArea_age_decay_shiftc = ExposedArea - AE_shift - Age.trend_AE * (Age - Age.cor)
  )

Re-estimating K, S, and I:

data <- data %>%
  mutate(
    K_age_decay = log10(TotalArea_age_decay) + 1/4*log10(AvgThickness_age_decay^2) - 5/4*log10(ExposedArea_age_decay),
    K_shiftc = log10(TotalArea_shiftc) + 1/4*log10(AvgThickness_shiftc^2) - 5/4*log10(ExposedArea_shiftc),
    K_age_decay_shiftc = log10(TotalArea_age_decay_shiftc) + 1/4*log10(AvgThickness_age_decay_shiftc^2) - 5/4*log10(ExposedArea_age_decay_shiftc),
    I_age_decay = log10(TotalArea_age_decay) + log10(ExposedArea_age_decay) + log10(AvgThickness_age_decay^2),
    I_shiftc = log10(TotalArea_shiftc) + log10(ExposedArea_shiftc) + log10(AvgThickness_shiftc^2),
    I_age_decay_shiftc = log10(TotalArea_age_decay_shiftc) + log10(ExposedArea_age_decay_shiftc) + log10(AvgThickness_age_decay_shiftc^2),
    S_age_decay = 3/2*log10(TotalArea_age_decay) + 3/4*log10(ExposedArea_age_decay) - 9/4*log10(AvgThickness_age_decay^2),
    S_shiftc = 3/2*log10(TotalArea_shiftc) + 3/4*log10(ExposedArea_shiftc) - 9/4*log10(AvgThickness_shiftc^2),
    S_age_decay_shiftc = 3/2*log10(TotalArea_age_decay_shiftc) + 3/4*log10(ExposedArea_age_decay_shiftc) - 9/4*log10(AvgThickness_age_decay_shiftc^2),
    Knorm_shiftc = K_shiftc / sqrt(1 + (1 / 4) ^ 2 + (5 / 2) ^ 2),
    Snorm_shiftc = S_shiftc / sqrt((3 / 2) ^ 2 + (3 / 4) ^ 2 + (9 / 4) ^ 2),
    Inorm_shiftc = I_shiftc / sqrt(1 ^ 2 + 1 ^ 2 + 1 ^ 2),
    Knorm_age_decay = K_age_decay / sqrt(1 + (1 / 4) ^ 2 + (5 / 2) ^ 2),
    Snorm_age_decay = S_age_decay / sqrt((3 / 2) ^ 2 + (3 / 4) ^ 2 + (9 / 4) ^ 2),
    Inorm_age_decay = I_age_decay / sqrt(1 ^ 2 + 1 ^ 2 + 1 ^ 2),
    Knorm_age_decay_shiftc = K_age_decay_shiftc / sqrt(1 + (1 / 4) ^ 2 + (5 / 2) ^ 2),
    Snorm_age_decay_shiftc = S_age_decay_shiftc / sqrt((3 / 2) ^ 2 +  (3 / 4) ^ 2 + (9 / 4) ^ 2),
    Inorm_age_decay_shiftc = I_age_decay_shiftc / sqrt(1 ^ 2 + 1 ^ 2 + 1 ^ 2)
  )

5.1 Results

To compare the values, we will have the raw measurements in light gray, and the new values in dark blue.

5.1.1 Base measurements

ggplot(filter(data, ROI == "hemisphere")) +
  geom_point(aes(x = Age, y = AvgThickness, alpha = 0.4),  color = "grey") +
  geom_point(aes(x = Age, y = AvgThickness_age_decay_shiftc, alpha = 0.4), color = "darkblue") +
  geom_smooth(method = "lm", aes(x = Age, y = AvgThickness), color = "grey") +
  geom_smooth(method = "lm", aes(x = Age, y = AvgThickness_age_decay_shiftc), color = "darkblue") +
        guides(alpha= "none") +
  theme_pubr()

ggplot(filter(data, ROI == "hemisphere")) +
  geom_point(aes(x = Age, y = TotalArea, alpha = 0.4),  color = "grey") +
  geom_point(aes(x = Age, y = TotalArea_age_decay_shiftc, alpha = 0.4), color = "darkblue") +
  geom_smooth(method = "lm", aes(x = Age, y = TotalArea), color = "grey") +
  geom_smooth(method = "lm", aes(x = Age, y = TotalArea_age_decay_shiftc), color = "darkblue") +
  guides(alpha= "none") +
  theme_pubr()

ggplot(filter(data, ROI == "hemisphere")) +
  geom_point(aes(x = Age, y = ExposedArea, alpha = 0.4),  color = "grey") +
  geom_point(aes(x = Age, y = ExposedArea_age_decay_shiftc, alpha = 0.4), color = "darkblue") +
  geom_smooth(method = "lm", aes(x = Age, y = ExposedArea), color = "grey") +
  geom_smooth(method = "lm", aes(x = Age, y = ExposedArea_age_decay_shiftc), color = "darkblue") +
  guides(alpha= "none") +
  theme_pubr()

5.1.1.1 Comparing with the raw data:

t_a <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = AvgThickness,  color = Sample, fill = Sample), alpha = 0.4) +
  geom_point(aes(alpha =0.4)) +
  geom_smooth(method = "lm") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan

 t_b <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = AvgThickness), alpha = 0.4) +
  geom_smooth(method = "lm", fullrange = TRUE, aes( color = Sample, fill = Sample)) +
    geom_smooth(method = "lm", fullrange = TRUE, color = "black", linetype = "dashed") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan

t_c <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = AvgThickness_age_decay_shiftc,  color = Sample, fill = Sample), alpha = 0.4) +
  geom_point(aes(alpha =0.4)) +
  geom_smooth(method = "lm") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan

 t_d <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = AvgThickness_age_decay_shiftc), alpha = 0.4) +
  geom_smooth(method = "lm", fullrange = TRUE, aes( color = Sample, fill = Sample)) +
    geom_smooth(method = "lm", fullrange = TRUE, color = "black", linetype = "dashed") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan
 
 ggarrange(t_a, t_b, t_c, t_d, common.legend = TRUE, ncol = 2)
## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

5.1.2 Novel measurements

ggplot(filter(data, ROI == "hemisphere")) +
  geom_point(aes(x = Age, y = K, alpha = 0.4),  color = "grey") +
  geom_point(aes(x = Age, y = K_age_decay_shiftc, alpha = 0.4), color = "darkblue") +
  geom_smooth(method = "lm", aes(x = Age, y = K), color = "grey") +
  geom_smooth(method = "lm", aes(x = Age, y = K_age_decay_shiftc), color = "darkblue") +
      guides(alpha= "none") +
  theme_pubr()

ggplot(filter(data, ROI == "hemisphere")) +
  geom_point(aes(x = Age, y = S, alpha = 0.4),  color = "grey") +
  geom_point(aes(x = Age, y = S_age_decay_shiftc, alpha = 0.4), color = "darkblue") +
  geom_smooth(method = "lm", aes(x = Age, y = S), color = "grey") +
  geom_smooth(method = "lm", aes(x = Age, y = S_age_decay_shiftc), color = "darkblue") +
      guides(alpha= "none") +
  theme_pubr()

ggplot(filter(data, ROI == "hemisphere")) +
  geom_point(aes(x = Age, y = I, alpha = 0.4),  color = "grey") +
  geom_point(aes(x = Age, y = I_age_decay_shiftc, alpha = 0.4), color = "darkblue") +
  geom_smooth(method = "lm", aes(x = Age, y = I), color = "grey") +
  geom_smooth(method = "lm", aes(x = Age, y = I_age_decay_shiftc), color = "darkblue") +
      guides(alpha= "none") +
  theme_pubr()

5.1.2.1 Comparing with the raw data:

k_a <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = K,  color = Sample, fill = Sample), alpha = 0.4) +
  geom_point(aes(alpha =0.4), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan

k_b <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = K), alpha = 0.4) +
  geom_smooth(method = "lm", fullrange = TRUE, aes( color = Sample, fill = Sample)) +
    geom_smooth(method = "lm", fullrange = TRUE, color = "black", linetype = "dashed") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan

k_c <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = K_age_decay_shiftc,  color = Sample, fill = Sample), alpha = 0.4) +
  geom_point(aes(alpha =0.4), 
             size = 4, 
             pch = 21, 
             colour = 'white') +
  geom_smooth(method = "lm") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan

k_d <- ggplot(filter(data, ROI == "hemisphere"), aes(x = Age, y = K_age_decay_shiftc), alpha = 0.4) +
  geom_smooth(method = "lm", fullrange = TRUE, aes( color = Sample, fill = Sample)) +
    geom_smooth(method = "lm", fullrange = TRUE, color = "black", linetype = "dashed") +
  guides(alpha = "none") + # added some transparency to improve visualization  
  theme_pubr() +
  xlim(0,100) # human lifespan
 
ap8 <- ggarrange(k_a, k_b, k_c, k_d, labels = c("A","B","C","D"), nrow =2, ncol = 2, font.label = list(size = 11), common.legend = TRUE, legend = "top")
ap8

# ggsave("ap8.pdf", ap8, dpi=1200, width = 17.8, height = 17.8, units = "cm", device = "pdf")

6 Limitations and expectations from future works

This is by necessity a simplistic, one-parameter way of modeling aging; but this very simplicity allows a direct generalization to a broad set of morphological variables. In what follows we discuss this method’s known limitations.

Both processes are still dependable on a Control subset of the data. We use the control subset as the reference since the deaging, and the harmonization can remove unwanted effects if applied directly to nontypical or pathological populations. Although, it could be helpful to measure the difference in morphological variables due to different methodologies in the same Sample (e.g., comparing acquisition protocols).

We could use higher-level functions or split the data into periods of years to avoid considering a constant rate of aging for all human lifespan. Here we are limited by the available and processed data. International consortium and open datasets should fulfill this task. We also want to keep the processes as simple as possible.

We also expect future works to provide rates from smaller brain regions, leading to studies in precise ROI.